{
"cells": [
{
"cell_type": "markdown",
"id": "fe1ee56b-fcbd-4ee7-be59-04d8799921bf",
"metadata": {},
"source": [
"# Pyteomics"
]
},
{
"cell_type": "raw",
"id": "268917e0-6da5-4065-8c8f-b7e3710c6e80",
"metadata": {
"editable": true,
"raw_mimetype": "text/html",
"slideshow": {
"slide_type": ""
},
"tags": []
},
"source": [
"\n",
" \n",
"\n",
"
"
]
},
{
"cell_type": "markdown",
"id": "ef467988-ea67-4119-8519-57ec9c70784f",
"metadata": {},
"source": [
"## Install pyteomics"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "57cafd99-f3fb-455a-b2fa-288e6bccf374",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: pyteomics in /home/joshua/mambaforge/envs/pyopenms-viz/lib/python3.12/site-packages (4.7.5)\n",
"Collecting lxml\n",
" Using cached lxml-5.3.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (3.8 kB)\n",
"Using cached lxml-5.3.0-cp312-cp312-manylinux_2_28_x86_64.whl (4.9 MB)\n",
"Installing collected packages: lxml\n",
"Successfully installed lxml-5.3.0\n"
]
}
],
"source": [
"!pip install pyteomics lxml --quiet"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "6f116382-7fea-4435-9717-b742db4e66c6",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from pyteomics import mzml"
]
},
{
"cell_type": "markdown",
"id": "246fbbb0-8418-476e-91ad-ca5dbeb8d87f",
"metadata": {},
"source": [
"## Download Data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "1f193209-cd41-4494-a4d4-aaeef0a132d7",
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"\n",
"url = 'https://raw.githubusercontent.com/levitsky/pyteomics/master/tests/test.mzML'\n",
"file_name = 'test.mzML'\n",
"\n",
"# # Send a GET request to the URL\n",
"response = requests.get(url)\n",
"\n",
"# # Save the content of the response to a file\n",
"with open(file_name, 'wb') as file:\n",
" file.write(response.content)\n",
"\n",
"print(f'File {file_name} downloaded successfully!')"
]
},
{
"cell_type": "markdown",
"id": "ab5fc65d-90fb-41a6-a01e-28cf934d51de",
"metadata": {},
"source": [
"## Load `.mzML` file and convert to `pd.DataFrame`"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "7a0d5e8f-7fbe-4f23-af41-dd652e1c6130",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n", " | ms_level | \n", "rt | \n", "mz | \n", "int | \n", "
---|---|---|---|---|
0 | \n", "1 | \n", "0.004935 | \n", "200.000188 | \n", "0.0 | \n", "
0 | \n", "1 | \n", "0.004935 | \n", "200.00043 | \n", "0.0 | \n", "
0 | \n", "1 | \n", "0.004935 | \n", "200.000673 | \n", "0.0 | \n", "
0 | \n", "1 | \n", "0.004935 | \n", "200.000915 | \n", "0.0 | \n", "
0 | \n", "1 | \n", "0.004935 | \n", "202.605829 | \n", "0.0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
1 | \n", "1 | \n", "0.005935 | \n", "1999.913086 | \n", "0.0 | \n", "
1 | \n", "1 | \n", "0.005935 | \n", "1999.937256 | \n", "0.0 | \n", "
1 | \n", "1 | \n", "0.005935 | \n", "1999.961548 | \n", "0.0 | \n", "
1 | \n", "1 | \n", "0.005935 | \n", "1999.985718 | \n", "0.0 | \n", "
1 | \n", "1 | \n", "0.005935 | \n", "2000.009888 | \n", "0.0 | \n", "
39828 rows × 4 columns
\n", "